function [last_episode,all_episodes]=simulation_exp_accommodation(alpha,beta,T,vh,vl,mu_l,sigma_l,grid_middle,tick,grid_size,BR)

%Runs a learning experiment once, for T episodes. 

%In this accommodation case player 1 is an AM and player 2 is a human with a
%dynamic manipulation strategy, aimed at "teaching" player 1 to play above the monopoly price.

%Returns last_episode, a 1 x 6 vector with the values in the last episode T of A_1, A_2, profit 1, profit 2, v_tilde, l.

%Returns all_episodes, a T x 12 matrix with the values in all episodes 1
%to T of A_1, A_2, opt_1, Profit_1, Profit_2, a_min, and the squared values of these
%variables.

%Pre-allocate
all_episodes = zeros(T,12);

%Inizialize Q-matrix with random values higher than monopoly expected
%profit.

Q_n=3+(6-3)*rand((2*grid_size+1),1);

%Generate a vector of T observation with prob=0.5 to be vl and prob=0.5 to be vh.
v_tilde = randsample([vl, vh], T, true)';             %compute the value of the asset in each episode
l  = normrnd(mu_l,sigma_l,T,1);                       %vector of random draws of l in each episode
vc = v_tilde +l;                                      %vector of investor valuations in each episode

%Generate vector to determine experimentation episodes
epsilon = exp(-beta*(1:1:T));               %Vector of experimentation probabilities

%Generate T x 1 vector with 1 in (t,1) if Player 1 (AMM) experiments at time t.
Experiment(:,1) = binornd(1,epsilon);  

%Loop over all episodes.
for t = 1:T

    %We compute the optimal price in episode t for player 1 as well as the
    %"second best" price.
    [M,index] = maxk(Q_n(:,1),2);
    maxvector = find(Q_n(:,1) == M(1));%Indices of all the values corresponding to a maximum. 
        %Note: finding all the values with maxvector is irrelevant if the Q_matrix has been initialized with
        %continuous random variables, but is important otherwise.        
    s  = maxvector(randi([1 length(maxvector)],1,1)); %Randomize if there are multiple greedy prices
    all_episodes(t,3) = (grid_middle - grid_size*tick - tick)+(s*tick);     %compute the greedy price of player 1 at time t

    %Compute the actual price chosen by player 1 based on experimentation or exploitation 
    %If Experiment=1 player 1 explores at this round. 
        if Experiment(t,1) == 1
            s  = randi([1 (2*grid_size+1)],1,1);  %Generates a random index in the range 1 to 2*grid_size+1
            all_episodes(t,1) = (grid_middle - grid_size*tick - tick) + tick*s;%Records the corresponding price

     %If Experiment=0 player 1 exploits at this round. 
        else
            all_episodes(t,1) = all_episodes(t,3) ;    %Records the greedy price as the actual price quoted by player 1
        end

    %We then compute the price of player 2 under the acccommodation strategy. 
    %Player 2's strategy is the following. Remember that the monopoly price is 7, and the grid goes
    %from 2 to 8. 
    %If player 1 plays any price below 8, then player 2 minimizes player 1's profit. 
    %If player 1 plays 8, then player 2 will also play 8 if and only if this can prevent player
    %1 from switching to a different strategy. Otherwise, player 2 undercuts (and hence plays the monopoly price).

    %By default, player 2 plays the strategy minimizing player 1's payoff,
    %pre-recorded in BR:
    all_episodes(t,2) = BR(s,1);
    %The best response is changed if player 1 may shift from 8 to something lower.
    if s == 7 %This means player 1 uses a price of 8
       %Compute the new q-value associated with a price of 8 if player 1 gets a zero payoff 
       q_new = (1-alpha)*M(1);
        
       if q_new < M(2) %Check that after a zero payoff player 1 would switch to a lower price
          all_episodes(t,2) = 8; %If so then player 2 plays 8.
       end
    end

    %We compute the profit in episode t. Investor buys if vc is greater than 
    %the best price a_min, such that profit is (a_min - v_tilde) if [vc >= a_min], and zero otherwise.
    %If both players set the same price the profit is split between them.

    all_episodes(t,6) = min(all_episodes(t,1:2));           %compute the lowest price
    if  all_episodes(t,6) <= vc(t)                          %check that the customer buys      
        index = find(all_episodes(t,6) == all_episodes(t,1:2));                 %find all players that set a_min
        m = size(index,2);                                      %compute the number of players that set a_min
        for i=index
            all_episodes(t,3+i) = (all_episodes(t,6) - v_tilde(t) ) / m;    %compute these players' profits. 
            % Note that for all other players or if the customer doesn't buy the profit stays the pre-allocated value of zero.
        end
    end 

    %Update the Q-Matrix of player 1:

        Q_n(s,1) = alpha*all_episodes(t,4) + (1-alpha)*Q_n(s,1);

end 

%Compute the squared values of the variables in all_episodes:
all_episodes(:,7:12) = all_episodes(:,1:6).^2;
%Record the last episode
last_episode = [all_episodes(end,1:2), all_episodes(end,4:5), v_tilde(end,1), l(end,1)];
